5 Functional traits
cluster_kegg_max <- cluster_kegg %>%
filter(!is.na(secondary_cluster)) %>%
select(-overall_strategy) %>%
group_by(secondary_cluster) %>%
summarise(across(everything(), max, na.rm = TRUE))5.1 Trait overview
phylum_heatmap <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(cluster_taxonomy, by=join_by(phylum == Phylum)) %>%
filter(secondary_cluster %in% cluster_tree$tip.label) %>%
arrange(match(secondary_cluster, cluster_tree$tip.label)) %>%
select(secondary_cluster,phylum) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
filter(!is.na(secondary_cluster)) %>%
column_to_rownames(var = "secondary_cluster")
function_tree <- keep.tip(cluster_tree, tip=rownames(phylum_heatmap))
function_table <- cluster_kegg_max %>%
filter(secondary_cluster %in% function_tree$tip.label) %>%
column_to_rownames(var="secondary_cluster")
# Generate basal tree
function_tree <- force.ultrametric(function_tree, method="extend") %>%
ggtree(., size = 0.3) ***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
#Add phylum colors next to the tree tips
function_tree <- gheatmap(function_tree, phylum_heatmap, offset=0, width=0.1, colnames=FALSE) +
scale_fill_manual(values=phylum_colors) +
labs(fill="Phylum")
#Reset fill scale to use a different colour profile in the heatmap
function_tree <- function_tree + new_scale_fill()
#Add functions heatmap
function_tree <- gheatmap(function_tree, function_table, offset=0.5, width=3.5, colnames=FALSE) +
vexpand(.08) +
coord_cartesian(clip = "off") +
scale_fill_gradient(low = "#f4f4f4", high = "steelblue", na.value="white") +
labs(fill="GIFT")
#Reset fill scale to use a different colour profile in the heatmap
function_tree <- function_tree + new_scale_fill()
function_tree
## Functional ordination
Rtsne(X=cluster_kegg_max %>% column_to_rownames(var="secondary_cluster"), dims = 2, check_duplicates = FALSE)$Y %>%
as.data.frame() %>%
mutate(secondary_cluster=cluster_kegg_max$secondary_cluster) %>%
rename(tSNE1=V1,tSNE2=V2) %>%
left_join(cluster_taxonomy,by="secondary_cluster") %>%
left_join(cluster_prevalence,by="secondary_cluster") %>%
ggplot(aes(x = tSNE1, y = tSNE2, color = Phylum, size=n_strategy))+
geom_point(shape=16, alpha=0.7) +
scale_color_manual(values=phylum_colors) +
theme_minimal() +
labs(color="Phylum", size="Number of strategies") +
guides(color = guide_legend(override.aes = list(size = 5)))5.2 Trait recovery
all_strategy_clusters <- cluster_prevalence %>%
filter(n_strategy==10) %>%
pull(secondary_cluster)
# Filter clusters recovered in all strategies
cluster_kegg_max_filt <- cluster_kegg_max %>%
filter(secondary_cluster %in% all_strategy_clusters)
cluster_kegg_proportion_max <- cluster_kegg %>%
filter(!is.na(secondary_cluster)) %>%
filter(secondary_cluster %in% all_strategy_clusters) %>%
group_by(secondary_cluster) %>%
mutate(across(where(is.numeric), ~ . / max(., na.rm = TRUE))) %>%
ungroup() %>%
rowwise() %>%
mutate(average = rowMeans(across(where(is.numeric)), na.rm = TRUE)) %>%
select(secondary_cluster,overall_strategy,average)
cluster_kegg_proportion_max %>%
group_by(overall_strategy) %>%
summarise(mean=mean(average),sd=sd(average)) %>%
tt()| overall_strategy | mean | sd |
|---|---|---|
| coassembly_animal | 0.9669740 | 0.03788229 |
| coassembly_timepoint_all | 0.9642462 | 0.04627303 |
| coassembly_timepoint_cage | 0.9695513 | 0.03515391 |
| multicoverage_animal | 0.9686745 | 0.03929870 |
| multicoverage_timepoint_all | 0.9453590 | 0.08602739 |
| multicoverage_timepoint_cage | 0.9704001 | 0.03229802 |
| multisplit_animal | 0.9595563 | 0.03673926 |
| multisplit_timepoint_all | 0.9760940 | 0.02945151 |
| multisplit_timepoint_cage | 0.9589287 | 0.03776599 |
| single_coverage | 0.9622317 | 0.03741529 |
cluster_kegg_proportion_max %>%
left_join(cluster_taxonomy,by="secondary_cluster") %>%
ggplot(aes(x = average, y = overall_strategy, group=overall_strategy, color=Phylum)) +
geom_boxplot(color="#999999", fill="#f4f4f4", outlier.shape = NA) +
scale_color_manual(values=phylum_colors[-c(1,4,6,7)]) +
xlim(0.8, 1)+
geom_jitter(alpha=0.3) +
theme_classic() +
theme() +
labs(x="Function recovery", y="Strategy")cluster_kegg_proportion_max %>%
mutate(secondary_cluster=factor(secondary_cluster,levels=rev(cluster_tree$tip.label[cluster_tree$tip.label %in% cluster_kegg_proportion_max$secondary_cluster]))) %>%
ggplot(aes(x=secondary_cluster,y=overall_strategy,fill=average))+
geom_tile()+
scale_fill_distiller(palette = "YlGnBu", direction = -1) +
theme_minimal() +
theme(axis.text.x = element_blank(),
axis.title.y = element_blank()) +
labs(y="Strategy",x="Clusters",fill="Function recovery")completeness_stats <- bin_metadata %>%
select(genome,completeness,contamination,overall_strategy, assembly, size) %>%
group_by(overall_strategy) %>%
summarise(completeness=mean(completeness))
functions_stats <- cluster_kegg_proportion_max %>%
group_by(overall_strategy) %>%
summarise(functions=mean(average))
full_join(completeness_stats,functions_stats,by="overall_strategy") %>%
ggplot(aes(x=functions,y=completeness, color=overall_strategy))+
geom_point()+
scale_color_manual(values=strategy_colors)+
theme_minimal()